We set the seed to 1178 for reproducability. This seed with remain the same for each model run.
options(stringsAsFactors=TRUE)
set.seed(1178)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
setwd("/Users/richardcoleman/Git/")
dfTraining <- read.csv("pml-training.csv",row.names = NULL)
dfTesting <- read.csv("pml-testing.csv", row.names = NULL)
dfTraining$X <- NULL
dfTesting$X <- NULL
dfTraining <- dfTraining[, colSums(is.na(dfTraining)) == 0]
library(Hmisc)
## Loading required package: grid
## Loading required package: survival
##
## Attaching package: 'survival'
##
## The following object is masked from 'package:caret':
##
## cluster
##
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
library(ggplot2)
describe(dfTraining)
## dfTraining
##
## 92 Variables 19622 Observations
## ---------------------------------------------------------------------------
## user_name
## n missing unique
## 19622 0 6
##
## adelmo carlitos charles eurico jeremy pedro
## Frequency 3892 3112 3536 3070 3402 2610
## % 20 16 18 16 17 13
## ---------------------------------------------------------------------------
## raw_timestamp_part_1
## n missing unique Info Mean .05 .10
## 19622 0 837 1 1.323e+09 1.322e+09 1.322e+09
## .25 .50 .75 .90 .95
## 1.323e+09 1.323e+09 1.323e+09 1.323e+09 1.323e+09
##
## lowest : 1322489605 1322489606 1322489607 1322489608 1322489609
## highest: 1323095077 1323095078 1323095079 1323095080 1323095081
## ---------------------------------------------------------------------------
## raw_timestamp_part_2
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 16783 1 500656 48389 100343 252912 496380
## .75 .90 .95
## 751891 900367 950649
##
## lowest : 294 301 307 309 312
## highest: 998716 998741 998749 998750 998801
## ---------------------------------------------------------------------------
## cvtd_timestamp
## n missing unique
## 19622 0 20
##
## lowest : 02/12/2011 13:32 02/12/2011 13:33 02/12/2011 13:34 02/12/2011 13:35 02/12/2011 14:56
## highest: 28/11/2011 14:14 28/11/2011 14:15 30/11/2011 17:10 30/11/2011 17:11 30/11/2011 17:12
## ---------------------------------------------------------------------------
## new_window
## n missing unique
## 19622 0 2
##
## no (19216, 98%), yes (406, 2%)
## ---------------------------------------------------------------------------
## num_window
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 858 1 430.6 44 88 222 424
## .75 .90 .95
## 644 780 821
##
## lowest : 1 2 3 4 5, highest: 860 861 862 863 864
## ---------------------------------------------------------------------------
## roll_belt
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 1330 1 64.41 -0.20 0.53 1.10 113.00
## .75 .90 .95
## 123.00 129.00 139.00
##
## lowest : -28.9 -28.8 -28.6 -28.4 -28.3
## highest: 158.0 159.0 160.0 161.0 162.0
## ---------------------------------------------------------------------------
## pitch_belt
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 1840 1 0.3053 -43.60 -42.10 1.76 5.28
## .75 .90 .95
## 14.90 25.40 26.20
##
## lowest : -55.8 -54.9 -54.7 -54.4 -53.9
## highest: 59.9 60.0 60.1 60.2 60.3
## ---------------------------------------------------------------------------
## yaw_belt
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 1957 1 -11.21 -93.5 -92.9 -88.3 -13.0
## .75 .90 .95
## 12.9 165.0 168.0
##
## lowest : -180 -179 -178 -177 -176, highest: 175 176 177 178 179
## ---------------------------------------------------------------------------
## total_accel_belt
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 29 0.98 11.31 2 3 3 17
## .75 .90 .95
## 18 20 21
##
## lowest : 0 1 2 3 4, highest: 25 26 27 28 29
## ---------------------------------------------------------------------------
## kurtosis_roll_belt
## n missing unique
## 19622 0 397
##
## lowest : -0.016850 -0.021024 -0.025513 -0.033935
## highest: 5.587755 5.681869 6.545935 7.004355 7.515290
## ---------------------------------------------------------------------------
## kurtosis_picth_belt
## n missing unique
## 19622 0 317
##
## lowest : -0.021887 -0.060755 -0.099173 -0.108371
## highest: 8.953960 9.042959 9.296951 9.804491 9.896970
## ---------------------------------------------------------------------------
## kurtosis_yaw_belt
## n missing unique
## 19622 0 2
##
## (19216, 98%), #DIV/0! (406, 2%)
## ---------------------------------------------------------------------------
## skewness_roll_belt
## n missing unique
## 19622 0 395
##
## lowest : -0.003095 -0.010002 -0.014020 -0.015465
## highest: 2.058296 2.097857 2.674649 2.713152 3.595369
## ---------------------------------------------------------------------------
## skewness_roll_belt.1
## n missing unique
## 19622 0 338
##
## lowest : -0.005928 -0.005960 -0.008391 -0.017954
## highest: 6.164414 6.708204 6.782330 6.855655 7.348469
## ---------------------------------------------------------------------------
## skewness_yaw_belt
## n missing unique
## 19622 0 2
##
## (19216, 98%), #DIV/0! (406, 2%)
## ---------------------------------------------------------------------------
## max_yaw_belt
## n missing unique
## 19622 0 68
##
## lowest : -0.1 -0.2 -0.3 -0.4, highest: 5.6 5.7 6.5 7.0 7.5
## ---------------------------------------------------------------------------
## min_yaw_belt
## n missing unique
## 19622 0 68
##
## lowest : -0.1 -0.2 -0.3 -0.4, highest: 5.6 5.7 6.5 7.0 7.5
## ---------------------------------------------------------------------------
## amplitude_yaw_belt
## n missing unique
## 19622 0 4
##
## (19216, 98%), #DIV/0! (10, 0%), 0.00 (12, 0%)
## 0.0000 (384, 2%)
## ---------------------------------------------------------------------------
## gyros_belt_x
## n missing unique Info Mean .05 .10
## 19622 0 140 1 -0.005592 -0.45 -0.39
## .25 .50 .75 .90 .95
## -0.03 0.03 0.11 0.16 0.21
##
## lowest : -1.04 -1.00 -0.98 -0.96 -0.95
## highest: 1.88 1.98 2.02 2.20 2.22
## ---------------------------------------------------------------------------
## gyros_belt_y
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 69 0.97 0.03959 -0.05 -0.03 0.00 0.02
## .75 .90 .95
## 0.11 0.13 0.13
##
## lowest : -0.64 -0.53 -0.51 -0.48 -0.45
## highest: 0.51 0.56 0.61 0.63 0.64
## ---------------------------------------------------------------------------
## gyros_belt_z
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 169 1 -0.1305 -0.49 -0.44 -0.20 -0.10
## .75 .90 .95
## -0.02 0.02 0.11
##
## lowest : -1.46 -1.35 -1.33 -1.30 -1.28
## highest: 1.41 1.44 1.51 1.61 1.62
## ---------------------------------------------------------------------------
## accel_belt_x
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 164 1 -5.595 -42 -39 -21 -15
## .75 .90 .95
## -5 49 51
##
## lowest : -120 -83 -82 -81 -80, highest: 78 79 81 83 85
## ---------------------------------------------------------------------------
## accel_belt_y
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 143 1 30.15 -1 1 3 35
## .75 .90 .95
## 61 68 71
##
## lowest : -69 -65 -54 -41 -38, highest: 109 121 149 150 164
## ---------------------------------------------------------------------------
## accel_belt_z
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 299 1 -72.59 -190 -175 -162 -152
## .75 .90 .95
## 27 42 45
##
## lowest : -275 -269 -268 -266 -265, highest: 101 102 103 104 105
## ---------------------------------------------------------------------------
## magnet_belt_x
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 327 1 55.6 -4 0 9 35
## .75 .90 .95
## 59 167 173
##
## lowest : -52 -49 -48 -46 -45, highest: 474 476 479 481 485
## ---------------------------------------------------------------------------
## magnet_belt_y
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 298 1 593.7 525.1 564.0 581.0 601.0
## .75 .90 .95
## 610.0 631.0 635.0
##
## lowest : 354 359 360 363 365, highest: 666 667 668 669 673
## ---------------------------------------------------------------------------
## magnet_belt_z
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 457 1 -345.5 -452 -431 -375 -320
## .75 .90 .95
## -306 -297 -291
##
## lowest : -623 -621 -620 -618 -616, highest: 284 286 287 289 293
## ---------------------------------------------------------------------------
## roll_arm
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 2654 0.99 17.83 -117.00 -70.09 -31.78 0.00
## .75 .90 .95
## 77.30 113.00 135.00
##
## lowest : -180 -178 -177 -176 -175, highest: 176 177 178 179 180
## ---------------------------------------------------------------------------
## pitch_arm
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 3087 0.99 -4.612 -56.0 -42.6 -25.9 0.0
## .75 .90 .95
## 11.2 34.6 51.1
##
## lowest : -88.8 -88.2 -87.9 -87.8 -87.7
## highest: 86.6 86.8 87.1 88.2 88.5
## ---------------------------------------------------------------------------
## yaw_arm
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 2876 0.99 -0.6188 -128.00 -92.09 -43.10 0.00
## .75 .90 .95
## 45.88 102.00 116.00
##
## lowest : -180 -179 -178 -177 -176, highest: 176 177 178 179 180
## ---------------------------------------------------------------------------
## total_accel_arm
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 66 1 25.51 8 11 17 27
## .75 .90 .95
## 33 38 42
##
## lowest : 1 2 3 4 5, highest: 62 63 64 65 66
## ---------------------------------------------------------------------------
## gyros_arm_x
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 643 1 0.04277 -3.48 -2.79 -1.33 0.08
## .75 .90 .95
## 1.57 2.59 3.12
##
## lowest : -6.37 -6.36 -6.34 -6.13 -6.12
## highest: 4.70 4.74 4.78 4.82 4.87
## ---------------------------------------------------------------------------
## gyros_arm_y
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 376 1 -0.2571 -1.56 -1.25 -0.80 -0.24
## .75 .90 .95
## 0.14 0.85 1.27
##
## lowest : -3.44 -3.40 -3.37 -3.32 -3.29
## highest: 2.76 2.78 2.79 2.81 2.84
## ---------------------------------------------------------------------------
## gyros_arm_z
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 248 1 0.2695 -0.67 -0.43 -0.07 0.23
## .75 .90 .95
## 0.72 0.95 1.10
##
## lowest : -2.33 -2.28 -2.17 -2.13 -2.10
## highest: 2.66 2.69 2.95 2.99 3.02
## ---------------------------------------------------------------------------
## accel_arm_x
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 777 1 -60.24 -289 -280 -242 -44
## .75 .90 .95
## 84 150 245
##
## lowest : -404 -383 -377 -371 -367, highest: 430 431 434 435 437
## ---------------------------------------------------------------------------
## accel_arm_y
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 537 1 32.6 -134 -102 -54 14
## .75 .90 .95
## 139 173 199
##
## lowest : -318 -315 -302 -301 -286, highest: 296 297 299 303 308
## ---------------------------------------------------------------------------
## accel_arm_z
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 792 1 -71.25 -340 -249 -143 -47
## .75 .90 .95
## 23 75 116
##
## lowest : -636 -630 -629 -613 -612, highest: 239 242 245 271 292
## ---------------------------------------------------------------------------
## magnet_arm_x
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 1339 1 191.7 -422 -403 -300 289
## .75 .90 .95
## 637 728 744
##
## lowest : -584 -580 -579 -578 -576, highest: 777 778 779 780 782
## ---------------------------------------------------------------------------
## magnet_arm_y
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 872 1 156.6 -200 -141 -9 202
## .75 .90 .95
## 323 391 435
##
## lowest : -392 -386 -384 -381 -377, highest: 577 578 580 582 583
## ---------------------------------------------------------------------------
## magnet_arm_z
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 1265 1 306.5 -420.0 -242.0 131.2 444.0
## .75 .90 .95
## 545.0 597.0 620.0
##
## lowest : -597 -596 -595 -590 -586, highest: 685 687 690 693 694
## ---------------------------------------------------------------------------
## kurtosis_roll_arm
## n missing unique
## 19622 0 330
##
## lowest : -0.02438 -0.04190 -0.05051 -0.05695
## highest: 3.96713 4.66566 5.50673 6.21009 7.66917
## ---------------------------------------------------------------------------
## kurtosis_picth_arm
## n missing unique
## 19622 0 328
##
## lowest : -0.00484 -0.01311 -0.02967 -0.07394
## highest: 4.41716 6.25063 6.39832 7.79477 9.16615
## ---------------------------------------------------------------------------
## kurtosis_yaw_arm
## n missing unique
## 19622 0 395
##
## lowest : -0.01548 -0.01749 -0.02101 -0.04059
## highest: 5.43713 5.46450 50.00000 56.00000 6.97222
## ---------------------------------------------------------------------------
## skewness_roll_arm
## n missing unique
## 19622 0 331
##
## lowest : -0.00051 -0.00696 -0.01884 -0.03359
## highest: 1.71066 2.09387 2.41765 4.15709 4.39449
## ---------------------------------------------------------------------------
## skewness_pitch_arm
## n missing unique
## 19622 0 328
##
## lowest : -0.00184 -0.01185 -0.01247 -0.02063
## highest: 2.11434 2.16019 2.19479 2.65520 3.04295
## ---------------------------------------------------------------------------
## skewness_yaw_arm
## n missing unique
## 19622 0 395
##
## lowest : -0.00311 -0.00562 -0.00800 -0.01697
## highest: 1.82084 2.18029 4.46409 7.07107 7.48331
## ---------------------------------------------------------------------------
## roll_dumbbell
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 16523 1 23.84 -110.15 -99.51 -18.49 48.17
## .75 .90 .95
## 67.61 102.74 116.28
##
## lowest : -153.7 -153.5 -152.8 -152.4 -152.0
## highest: 151.0 151.4 152.1 153.4 153.5
## ---------------------------------------------------------------------------
## pitch_dumbbell
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 16040 1 -10.78 -56.44 -51.70 -40.89 -20.96
## .75 .90 .95
## 17.50 38.51 51.34
##
## lowest : -149.6 -148.5 -137.3 -134.7 -130.1
## highest: 127.9 129.5 129.8 137.0 149.4
## ---------------------------------------------------------------------------
## yaw_dumbbell
## n missing unique Info Mean .05 .10 .25
## 19622 0 16381 1 1.674 -102.418 -95.474 -77.644
## .50 .75 .90 .95
## -3.324 79.643 121.218 129.065
##
## lowest : -150.9 -148.8 -147.1 -146.2 -144.3
## highest: 154.1 154.2 154.5 154.8 155.0
## ---------------------------------------------------------------------------
## kurtosis_roll_dumbbell
## n missing unique
## 19622 0 398
##
## lowest : -0.0035 -0.0073 -0.0115 -0.0262
## highest: 6.1973 7.4175 7.5633 7.9609 8.9336
## ---------------------------------------------------------------------------
## kurtosis_picth_dumbbell
## n missing unique
## 19622 0 401
##
## lowest : -0.0163 -0.0233 -0.0280 -0.0308
## highest: 5.7918 5.8136 5.9003 55.6279 9.5485
## ---------------------------------------------------------------------------
## kurtosis_yaw_dumbbell
## n missing unique
## 19622 0 2
##
## (19216, 98%), #DIV/0! (406, 2%)
## ---------------------------------------------------------------------------
## skewness_roll_dumbbell
## n missing unique
## 19622 0 401
##
## lowest : -0.0082 -0.0096 -0.0172 -0.0224
## highest: 1.5574 1.5964 1.7210 1.9255 1.9579
## ---------------------------------------------------------------------------
## skewness_pitch_dumbbell
## n missing unique
## 19622 0 402
##
## lowest : -0.0053 -0.0084 -0.0166 -0.0452
## highest: 1.7872 1.8951 2.0773 2.5456 3.7687
## ---------------------------------------------------------------------------
## skewness_yaw_dumbbell
## n missing unique
## 19622 0 2
##
## (19216, 98%), #DIV/0! (406, 2%)
## ---------------------------------------------------------------------------
## max_yaw_dumbbell
## n missing unique
## 19622 0 73
##
## lowest : -0.1 -0.2 -0.3 -0.4, highest: 6.2 7.4 7.6 8.0 8.9
## ---------------------------------------------------------------------------
## min_yaw_dumbbell
## n missing unique
## 19622 0 73
##
## lowest : -0.1 -0.2 -0.3 -0.4, highest: 6.2 7.4 7.6 8.0 8.9
## ---------------------------------------------------------------------------
## amplitude_yaw_dumbbell
## n missing unique
## 19622 0 3
##
## (19216, 98%), #DIV/0! (5, 0%), 0.00 (401, 2%)
## ---------------------------------------------------------------------------
## total_accel_dumbbell
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 43 1 13.72 2 2 4 10
## .75 .90 .95
## 19 30 31
##
## lowest : 0 1 2 3 4, highest: 38 39 40 42 58
## ---------------------------------------------------------------------------
## gyros_dumbbell_x
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 241 1 0.1611 -0.37 -0.21 -0.03 0.13
## .75 .90 .95
## 0.35 0.59 0.83
##
## lowest : -204.00 -1.99 -1.94 -1.86 -1.85
## highest: 2.07 2.14 2.17 2.20 2.22
## ---------------------------------------------------------------------------
## gyros_dumbbell_y
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 278 1 0.04606 -0.80 -0.48 -0.14 0.03
## .75 .90 .95
## 0.21 0.56 0.92
##
## lowest : -2.10 -2.07 -2.06 -2.04 -2.01
## highest: 2.63 2.71 2.73 4.37 52.00
## ---------------------------------------------------------------------------
## gyros_dumbbell_z
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 206 1 -0.129 -0.67 -0.49 -0.31 -0.13
## .75 .90 .95
## 0.03 0.20 0.33
##
## lowest : -2.38 -2.30 -2.08 -2.00 -1.95
## highest: 1.61 1.67 1.72 1.87 317.00
## ---------------------------------------------------------------------------
## accel_dumbbell_x
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 425 1 -28.62 -145 -140 -50 -8
## .75 .90 .95
## 11 29 53
##
## lowest : -419 -237 -236 -235 -234, highest: 217 219 224 234 235
## ---------------------------------------------------------------------------
## accel_dumbbell_y
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 466 1 52.63 -61.0 -41.0 -8.0 41.5
## .75 .90 .95
## 111.0 161.0 194.0
##
## lowest : -189 -182 -181 -179 -176, highest: 299 300 302 310 315
## ---------------------------------------------------------------------------
## accel_dumbbell_z
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 410 1 -38.32 -206 -197 -142 -1
## .75 .90 .95
## 38 88 98
##
## lowest : -334 -319 -284 -273 -272, highest: 314 315 316 317 318
## ---------------------------------------------------------------------------
## magnet_dumbbell_x
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 1128 1 -328.5 -579.0 -564.0 -535.0 -479.0
## .75 .90 .95
## -304.0 445.9 520.0
##
## lowest : -643 -639 -638 -637 -635, highest: 579 582 583 584 592
## ---------------------------------------------------------------------------
## magnet_dumbbell_y
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 844 1 221 -562 -516 231 311
## .75 .90 .95
## 390 500 551
##
## lowest : -3600 -744 -742 -741 -740
## highest: 629 630 631 632 633
## ---------------------------------------------------------------------------
## magnet_dumbbell_z
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 676 1 46.05 -152 -93 -45 13
## .75 .90 .95
## 95 294 345
##
## lowest : -262 -250 -249 -248 -245, highest: 442 443 447 451 452
## ---------------------------------------------------------------------------
## roll_forearm
## n missing unique Info Mean .05 .10
## 19622 0 2176 0.99 33.83 -175.0000 -154.0000
## .25 .50 .75 .90 .95
## -0.7375 21.7000 140.0000 162.0000 174.0000
##
## lowest : -180 -179 -178 -177 -176, highest: 176 177 178 179 180
## ---------------------------------------------------------------------------
## pitch_forearm
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 2915 0.99 10.71 -51.40 -19.20 0.00 9.24
## .75 .90 .95
## 28.40 46.10 54.00
##
## lowest : -72.5 -72.4 -72.1 -71.6 -71.4
## highest: 87.5 87.9 88.4 88.7 89.8
## ---------------------------------------------------------------------------
## yaw_forearm
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 1991 0.99 19.21 -153.0 -138.0 -68.6 0.0
## .75 .90 .95
## 110.0 151.0 163.0
##
## lowest : -180 -179 -178 -177 -176, highest: 176 177 178 179 180
## ---------------------------------------------------------------------------
## kurtosis_roll_forearm
## n missing unique
## 19622 0 322
##
## lowest : -0.0227 -0.0359 -0.0567 -0.0781
## highest: 3.4833 4.3514 4.5821 40.0597 8.8039
## ---------------------------------------------------------------------------
## kurtosis_picth_forearm
## n missing unique
## 19622 0 323
##
## lowest : -0.0073 -0.0442 -0.0489 -0.0523
## highest: 8.8271 8.8831 9.1693 9.5584 9.9138
## ---------------------------------------------------------------------------
## kurtosis_yaw_forearm
## n missing unique
## 19622 0 2
##
## (19216, 98%), #DIV/0! (406, 2%)
## ---------------------------------------------------------------------------
## skewness_roll_forearm
## n missing unique
## 19622 0 323
##
## lowest : -0.0004 -0.0013 -0.0063 -0.0088
## highest: 1.2817 1.8676 1.9796 2.6579 5.8557
## ---------------------------------------------------------------------------
## skewness_pitch_forearm
## n missing unique
## 19622 0 319
##
## lowest : -0.0113 -0.0131 -0.0405 -0.0478
## highest: 2.4216 2.5226 2.7813 3.5998 4.4641
## ---------------------------------------------------------------------------
## skewness_yaw_forearm
## n missing unique
## 19622 0 2
##
## (19216, 98%), #DIV/0! (406, 2%)
## ---------------------------------------------------------------------------
## max_yaw_forearm
## n missing unique
## 19622 0 45
##
## lowest : -0.1 -0.2 -0.3 -0.4, highest: 3.5 4.4 4.6 40.1 8.8
## ---------------------------------------------------------------------------
## min_yaw_forearm
## n missing unique
## 19622 0 45
##
## lowest : -0.1 -0.2 -0.3 -0.4, highest: 3.5 4.4 4.6 40.1 8.8
## ---------------------------------------------------------------------------
## amplitude_yaw_forearm
## n missing unique
## 19622 0 3
##
## (19216, 98%), #DIV/0! (84, 0%), 0.00 (322, 2%)
## ---------------------------------------------------------------------------
## total_accel_forearm
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 70 1 34.72 15 23 29 36
## .75 .90 .95
## 41 47 50
##
## lowest : 0 1 2 3 4, highest: 68 73 78 79 108
## ---------------------------------------------------------------------------
## gyros_forearm_x
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 298 1 0.158 -0.75 -0.55 -0.22 0.05
## .75 .90 .95
## 0.56 1.04 1.24
##
## lowest : -22.00 -4.95 -3.36 -3.08 -2.99
## highest: 3.10 3.26 3.48 3.52 3.97
## ---------------------------------------------------------------------------
## gyros_forearm_y
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 741 1 0.07517 -3.61 -2.94 -1.46 0.03
## .75 .90 .95
## 1.62 2.83 3.50
##
## lowest : -7.02 -6.65 -6.62 -6.54 -6.52
## highest: 6.09 6.10 6.12 6.13 311.00
## ---------------------------------------------------------------------------
## gyros_forearm_z
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 307 1 0.1512 -0.82 -0.57 -0.18 0.08
## .75 .90 .95
## 0.49 0.89 1.13
##
## lowest : -8.09 -7.94 -6.99 -5.55 -4.28
## highest: 3.35 4.04 4.10 4.31 231.00
## ---------------------------------------------------------------------------
## accel_forearm_x
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 794 1 -61.65 -392 -331 -178 -57
## .75 .90 .95
## 76 188 223
##
## lowest : -498 -496 -487 -479 -477, highest: 370 375 381 389 477
## ---------------------------------------------------------------------------
## accel_forearm_y
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 1003 1 163.7 -227 -140 57 201
## .75 .90 .95
## 312 395 436
##
## lowest : -632 -595 -585 -537 -496, highest: 588 589 590 591 923
## ---------------------------------------------------------------------------
## accel_forearm_z
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 580 1 -55.29 -222.0 -209.0 -182.0 -39.0
## .75 .90 .95
## 26.0 175.9 199.0
##
## lowest : -446 -410 -391 -386 -381, highest: 275 277 285 287 291
## ---------------------------------------------------------------------------
## magnet_forearm_x
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 1524 1 -312.6 -700 -684 -616 -378
## .75 .90 .95
## -73 128 379
##
## lowest : -1280 -1270 -1260 -1250 -1240
## highest: 660 661 663 666 672
## ---------------------------------------------------------------------------
## magnet_forearm_y
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 1872 1 380.1 -632.9 -472.9 2.0 591.0
## .75 .90 .95
## 737.0 797.0 981.0
##
## lowest : -896 -892 -890 -885 -882, highest: 1430 1440 1450 1460 1480
## ---------------------------------------------------------------------------
## magnet_forearm_z
## n missing unique Info Mean .05 .10 .25 .50
## 19622 0 1683 1 393.6 -453.0 -153.9 191.0 511.0
## .75 .90 .95
## 653.0 736.0 799.0
##
## lowest : -973 -966 -964 -963 -962, highest: 1040 1050 1070 1080 1090
## ---------------------------------------------------------------------------
## classe
## n missing unique
## 19622 0 5
##
## A B C D E
## Frequency 5580 3797 3422 3216 3607
## % 28 19 17 16 18
## ---------------------------------------------------------------------------
describe(dfTraining$classe)
## dfTraining$classe
## n missing unique
## 19622 0 5
##
## A B C D E
## Frequency 5580 3797 3422 3216 3607
## % 28 19 17 16 18
colNames <- colnames(dfTraining)
for (i in 1:ncol(dfTraining)) {
print(qplot(x = dfTraining[,i],data=dfTraining,geom = "histogram") +
labs(x = colNames[i]))
}
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: position_stack requires constant width: output may be incorrect
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
We first see that some of our predictors contain missing data. We might want to impute this before we begin to try and predict our classe variable. We also see that our data contains some outliers, which suggests we may want to transform some predictors. A log function may be a poor choise here as many predictors have zero values.
# library(mice)
#
# temp <- dfTraining[,10:20]
#
# impTrain <- mice(temp,m=2)
We have to be careful here, as we have many values centered at zero. We also do not want to introduce negative values to our predictors unless we are sure it makes sense.
# dfTemp <- sapply(dfTraining,is.numeric)
#
# dfTemp <- dfTraining[,dfTemp]
#
# dfTemp <- scale(dfTemp,center=TRUE,scale=TRUE)
#
# colnames(dfTemp) <- paste0(colnames(dfTemp),"_std")
#
# x <- cbind(dfTraining,dfTemp)
colNames <- colnames(dfTraining)
for(i in 1:ncol(dfTraining)) {
print(ggplot(data = dfTraining, aes(y=dfTraining[,i],x=dfTraining$classe)) +
geom_point() +
labs(y = colNames[i], x = "classe"))
}
inBuild <- createDataPartition(y=dfTraining$classe,p=.7,list=FALSE)
buildData <- dfTraining[inBuild,]
validation <- dfTraining[-inBuild,]
inTrain <- createDataPartition(y=buildData$classe,p=.6,list=FALSE)
training <- buildData[inTrain,]
testing <- buildData[-inTrain,]
# library(Hmisc)
#
# corr <- rcorr(training,type="pearson")
x <- filterVarImp(x = training[, -ncol(training)], y = training$classe)
print(x)
## A B C D E
## user_name 0.5134853 0.5397004 0.5309608 0.5397004 0.5294021
## raw_timestamp_part_1 0.6268866 0.6268866 0.6096734 0.6054419 0.6248057
## raw_timestamp_part_2 0.5073593 0.5132744 0.5092069 0.5078197 0.5132744
## cvtd_timestamp 0.5769398 0.6052389 0.5643904 0.6024619 0.6052389
## new_window 0.4996867 0.5020014 0.5020014 0.5008380 0.5014536
## num_window 0.6636008 0.6636008 0.6425493 0.5596084 0.6466329
## roll_belt 0.6381112 0.6246830 0.6299681 0.6444705 0.6444705
## pitch_belt 0.5427825 0.5176937 0.5369733 0.5274233 0.5427825
## yaw_belt 0.5658871 0.5666952 0.5552093 0.5583397 0.5666952
## total_accel_belt 0.5734829 0.5453780 0.5349146 0.5453780 0.5734829
## kurtosis_roll_belt 0.4996725 0.5019988 0.5019988 0.5008152 0.5014387
## kurtosis_picth_belt 0.4997124 0.5020494 0.5020494 0.5008486 0.5014252
## kurtosis_yaw_belt 0.4996867 0.5020014 0.5020014 0.5008380 0.5014536
## skewness_roll_belt 0.4996553 0.5020407 0.5020407 0.5008783 0.5014892
## skewness_roll_belt.1 0.4996490 0.5019585 0.5019585 0.5008291 0.5014294
## skewness_yaw_belt 0.4996867 0.5020014 0.5020014 0.5008380 0.5014536
## max_yaw_belt 0.4996631 0.5019851 0.5019851 0.5008166 0.5014290
## min_yaw_belt 0.4996631 0.5019851 0.5019851 0.5008166 0.5014290
## amplitude_yaw_belt 0.4997005 0.5020082 0.5020082 0.5008243 0.5014484
## gyros_belt_x 0.5182420 0.5169422 0.5120470 0.5182420 0.5093889
## gyros_belt_y 0.5109178 0.5065494 0.5152846 0.5145634 0.5152846
## gyros_belt_z 0.5705812 0.5526004 0.5562532 0.5707359 0.5707359
## accel_belt_x 0.5530465 0.5531390 0.5568565 0.5657906 0.5657906
## accel_belt_y 0.5233597 0.5408436 0.5362591 0.5248257 0.5408436
## accel_belt_z 0.6276412 0.5547544 0.5601100 0.5403503 0.6276412
## magnet_belt_x 0.5751567 0.5779000 0.5745523 0.5515748 0.5779000
## magnet_belt_y 0.7042735 0.6846954 0.6864964 0.6961019 0.7042735
## magnet_belt_z 0.6610409 0.6570988 0.6548968 0.6678376 0.6678376
## roll_arm 0.6135021 0.6135021 0.5796561 0.5711926 0.5659869
## pitch_arm 0.6451702 0.5590683 0.6204978 0.6256854 0.6451702
## yaw_arm 0.5469351 0.5469351 0.5366485 0.5306448 0.5217757
## total_accel_arm 0.6270318 0.5963430 0.6034105 0.6270318 0.6060178
## gyros_arm_x 0.5225574 0.5225574 0.5215749 0.5094470 0.5211644
## gyros_arm_y 0.5327728 0.5327728 0.5229546 0.5285199 0.5316335
## gyros_arm_z 0.5127819 0.5132959 0.5091482 0.5086250 0.5132959
## accel_arm_x 0.7252781 0.6502704 0.6587791 0.7252781 0.6844558
## accel_arm_y 0.5800096 0.5471228 0.5689480 0.5740486 0.5800096
## accel_arm_z 0.6088139 0.5878772 0.6088139 0.5874760 0.5551867
## magnet_arm_x 0.7388055 0.6576832 0.6812789 0.7388055 0.6941739
## magnet_arm_y 0.7342272 0.6158267 0.6818645 0.7342272 0.7033880
## magnet_arm_z 0.6622455 0.6622455 0.6262355 0.6366404 0.6494513
## kurtosis_roll_arm 0.4996742 0.5020306 0.5020306 0.5008642 0.5014921
## kurtosis_picth_arm 0.4996493 0.5019814 0.5019814 0.5008549 0.5014979
## kurtosis_yaw_arm 0.4996244 0.5019918 0.5019918 0.5008236 0.5014246
## skewness_roll_arm 0.4996612 0.5020337 0.5020337 0.5008804 0.5014805
## skewness_pitch_arm 0.4996573 0.5019953 0.5019953 0.5008734 0.5014701
## skewness_yaw_arm 0.4996846 0.5020158 0.5020158 0.5008904 0.5014598
## roll_dumbbell 0.6588673 0.6961325 0.7602064 0.7602064 0.6763427
## pitch_dumbbell 0.6579801 0.7119805 0.7119805 0.6792556 0.6373775
## yaw_dumbbell 0.5543138 0.6190584 0.6190584 0.5659446 0.5874803
## kurtosis_roll_dumbbell 0.4996431 0.5019979 0.5019979 0.5008551 0.5014693
## kurtosis_picth_dumbbell 0.4996891 0.5019892 0.5019892 0.5008380 0.5014383
## kurtosis_yaw_dumbbell 0.4996867 0.5020014 0.5020014 0.5008380 0.5014536
## skewness_roll_dumbbell 0.4996993 0.5020136 0.5020136 0.5008403 0.5014577
## skewness_pitch_dumbbell 0.4996849 0.5019609 0.5019609 0.5008268 0.5014060
## skewness_yaw_dumbbell 0.4996867 0.5020014 0.5020014 0.5008380 0.5014536
## max_yaw_dumbbell 0.4996612 0.5020178 0.5020178 0.5008463 0.5014621
## min_yaw_dumbbell 0.4996612 0.5020178 0.5020178 0.5008463 0.5014621
## amplitude_yaw_dumbbell 0.4996913 0.5020014 0.5020014 0.5008380 0.5014478
## total_accel_dumbbell 0.5483854 0.6109927 0.5745746 0.6109927 0.5988302
## gyros_dumbbell_x 0.5232314 0.5263436 0.5305536 0.5305536 0.4933295
## gyros_dumbbell_y 0.5412461 0.5496939 0.5641117 0.5641117 0.5534297
## gyros_dumbbell_z 0.5162821 0.5162821 0.5102417 0.5010332 0.5093901
## accel_dumbbell_x 0.6685349 0.6685349 0.6646330 0.6373326 0.6231495
## accel_dumbbell_y 0.6127719 0.6331695 0.6331695 0.6300618 0.5852254
## accel_dumbbell_z 0.6247845 0.6247845 0.6169780 0.5756091 0.5896521
## magnet_dumbbell_x 0.7000778 0.7000778 0.6922764 0.6537327 0.6651159
## magnet_dumbbell_y 0.6425914 0.7026177 0.7026177 0.6547110 0.6659260
## magnet_dumbbell_z 0.6699102 0.5822907 0.6699102 0.6210012 0.6648677
## roll_forearm 0.6049963 0.5772592 0.6049963 0.5790523 0.5591335
## pitch_forearm 0.7978269 0.6874454 0.7071158 0.7978269 0.7083352
## yaw_forearm 0.5183144 0.5654765 0.5786503 0.5786503 0.5669315
## kurtosis_roll_forearm 0.4997041 0.5019812 0.5019812 0.5008398 0.5014273
## kurtosis_picth_forearm 0.4997084 0.5020099 0.5020099 0.5008009 0.5014492
## kurtosis_yaw_forearm 0.4996867 0.5020014 0.5020014 0.5008380 0.5014536
## skewness_roll_forearm 0.4996947 0.5019737 0.5019737 0.5007879 0.5014352
## skewness_pitch_forearm 0.4996704 0.5019790 0.5019790 0.5008477 0.5014385
## skewness_yaw_forearm 0.4996867 0.5020014 0.5020014 0.5008380 0.5014536
## max_yaw_forearm 0.4997057 0.5019827 0.5019827 0.5008380 0.5014255
## min_yaw_forearm 0.4997057 0.5019827 0.5019827 0.5008380 0.5014255
## amplitude_yaw_forearm 0.4996987 0.5020182 0.5020182 0.5008542 0.5014695
## total_accel_forearm 0.6053719 0.5864724 0.5706206 0.5927630 0.6053719
## gyros_forearm_x 0.5157432 0.5204091 0.5286898 0.5286898 0.5251298
## gyros_forearm_y 0.5125610 0.5125610 0.5101955 0.5075451 0.5082118
## gyros_forearm_z 0.5196666 0.5232374 0.5232374 0.5208738 0.5125013
## accel_forearm_x 0.7538789 0.6626486 0.7053762 0.7538789 0.6621246
## accel_forearm_y 0.5893872 0.5755231 0.5893872 0.5826360 0.5604263
## accel_forearm_z 0.5301225 0.5363562 0.5363562 0.5327640 0.5315553
## magnet_forearm_x 0.7203770 0.6551242 0.6213740 0.7203770 0.6420153
## magnet_forearm_y 0.6240376 0.5800635 0.6466061 0.6466061 0.6117382
## magnet_forearm_z 0.5526250 0.5402176 0.6023436 0.6023436 0.5880043
library(caret)
library(doParallel)
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
registerDoParallel(cores=2)
dfTemp <- sapply(training,is.numeric)
dfTemp <- training[,dfTemp]
dfTemp$classe <- training$classe
fitControl <- trainControl(## 2-fold CV
method = "repeatedcv",
number = 5,
## repeated 2 times
repeats = 5)
lm1 <- train(classe ~ .
,method="multinom"
,trControl = fitControl
,preProcess ="pca"
, data=dfTemp)
## Loading required package: nnet
## # weights: 140 (108 variable)
## initial value 13268.206150
## iter 10 value 10128.419146
## iter 20 value 9921.604378
## iter 30 value 9905.456318
## iter 40 value 9702.675733
## iter 50 value 9640.053564
## iter 60 value 9599.426936
## iter 70 value 9595.803639
## iter 80 value 9591.824567
## iter 90 value 9590.310992
## iter 100 value 9589.986044
## final value 9589.986044
## stopped after 100 iterations
#training evaluation
lm1Pred <- predict(lm1,training)
confusionMatrix(lm1Pred, training$classe)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E
## A 1614 319 365 118 164
## B 172 738 145 158 257
## C 190 232 739 196 182
## D 301 145 133 712 161
## E 67 161 56 168 751
##
## Overall Statistics
##
## Accuracy : 0.5524
## 95% CI : (0.5416, 0.5632)
## No Information Rate : 0.2843
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.4322
## Mcnemar's Test P-Value : < 2.2e-16
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E
## Sensitivity 0.6886 0.46270 0.51391 0.52663 0.4957
## Specificity 0.8363 0.88991 0.88246 0.89263 0.9328
## Pos Pred Value 0.6256 0.50204 0.48018 0.49036 0.6243
## Neg Pred Value 0.8711 0.87349 0.89575 0.90577 0.8915
## Prevalence 0.2843 0.19347 0.17443 0.16400 0.1838
## Detection Rate 0.1958 0.08952 0.08964 0.08637 0.0911
## Detection Prevalence 0.3130 0.17831 0.18668 0.17613 0.1459
## Balanced Accuracy 0.7624 0.67630 0.69818 0.70963 0.7143
#
# logReg <- lm$finalModel
#
# print(logReg)
The performance for our multinomial regression is very poor to begin with (out of sample error of 0.5565). This suggests that I might try a non linear approach such as Random Forest.
library(caret)
library(doParallel)
registerDoParallel(cores=2)
dfTemp <- sapply(training,is.numeric)
dfTemp <- training[,dfTemp]
dfTemp$classe <- training$classe
#dfTemp$max_yaw_dumbbell <- training$max_yaw_dumbbell
fitControl <- trainControl(## 3-fold CV
method = "repeatedcv",
number = 3,
## repeated 3 times
repeats = 3)
rf1 <- train(classe ~ .
,method="rf"
,trControl = fitControl
,preProcess ="pca"
, data=dfTemp)
## Loading required package: randomForest
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
##
## The following object is masked from 'package:Hmisc':
##
## combine
finMod2 <- rf1$finalModel
print(finMod2)
##
## Call:
## randomForest(x = x, y = y, mtry = param$mtry)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 2
##
## OOB estimate of error rate: 3.91%
## Confusion matrix:
## A B C D E class.error
## A 2305 9 9 14 7 0.01663823
## B 50 1496 40 3 6 0.06206897
## C 6 40 1368 21 3 0.04867872
## D 5 4 65 1273 5 0.05843195
## E 2 10 11 12 1480 0.02310231
#training evaluation
rf1Pred <- predict(rf1,training)
confusionMatrix(rf1Pred, training$classe)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E
## A 2344 0 0 0 0
## B 0 1595 0 0 0
## C 0 0 1438 0 0
## D 0 0 0 1352 0
## E 0 0 0 0 1515
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.9996, 1)
## No Information Rate : 0.2843
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 1
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E
## Sensitivity 1.0000 1.0000 1.0000 1.000 1.0000
## Specificity 1.0000 1.0000 1.0000 1.000 1.0000
## Pos Pred Value 1.0000 1.0000 1.0000 1.000 1.0000
## Neg Pred Value 1.0000 1.0000 1.0000 1.000 1.0000
## Prevalence 0.2843 0.1935 0.1744 0.164 0.1838
## Detection Rate 0.2843 0.1935 0.1744 0.164 0.1838
## Detection Prevalence 0.2843 0.1935 0.1744 0.164 0.1838
## Balanced Accuracy 1.0000 1.0000 1.0000 1.000 1.0000
#validation evaluation
rf1Pred <- predict(rf1,validation)
confusionMatrix(rf1Pred, validation$classe)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E
## A 1648 26 1 4 0
## B 14 1081 43 5 1
## C 3 29 970 48 7
## D 9 1 9 901 12
## E 0 2 3 6 1062
##
## Overall Statistics
##
## Accuracy : 0.9621
## 95% CI : (0.9569, 0.9668)
## No Information Rate : 0.2845
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9521
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E
## Sensitivity 0.9845 0.9491 0.9454 0.9346 0.9815
## Specificity 0.9926 0.9867 0.9821 0.9937 0.9977
## Pos Pred Value 0.9815 0.9449 0.9177 0.9667 0.9897
## Neg Pred Value 0.9938 0.9878 0.9884 0.9873 0.9958
## Prevalence 0.2845 0.1935 0.1743 0.1638 0.1839
## Detection Rate 0.2800 0.1837 0.1648 0.1531 0.1805
## Detection Prevalence 0.2853 0.1944 0.1796 0.1584 0.1823
## Balanced Accuracy 0.9886 0.9679 0.9638 0.9642 0.9896
Now that we have evaluated our two models on our training and validation set, it is time to see how well they both generalize to our testing set. This will help us decide which model we choose for the programming part of this assignment.
#Random Forest
rf1Pred <- predict(rf1,testing)
confusionMatrix(rf1Pred, testing$classe)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E
## A 1528 25 2 1 1
## B 22 1006 42 4 3
## C 5 26 902 49 7
## D 5 3 10 846 7
## E 2 3 2 0 992
##
## Overall Statistics
##
## Accuracy : 0.9601
## 95% CI : (0.9546, 0.9652)
## No Information Rate : 0.2844
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9496
## Mcnemar's Test P-Value : 3.371e-06
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E
## Sensitivity 0.9782 0.9464 0.9415 0.9400 0.9822
## Specificity 0.9926 0.9840 0.9808 0.9946 0.9984
## Pos Pred Value 0.9814 0.9341 0.9120 0.9713 0.9930
## Neg Pred Value 0.9914 0.9871 0.9876 0.9883 0.9960
## Prevalence 0.2844 0.1935 0.1744 0.1638 0.1839
## Detection Rate 0.2782 0.1831 0.1642 0.1540 0.1806
## Detection Prevalence 0.2835 0.1961 0.1800 0.1586 0.1819
## Balanced Accuracy 0.9854 0.9652 0.9612 0.9673 0.9903
#Multinomial Logit
lm1Pred <- predict(lm1,testing)
confusionMatrix(lm1Pred, testing$classe)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E
## A 1090 224 224 90 122
## B 126 472 111 118 181
## C 122 165 487 126 121
## D 186 100 87 444 91
## E 38 102 49 122 495
##
## Overall Statistics
##
## Accuracy : 0.544
## 95% CI : (0.5307, 0.5572)
## No Information Rate : 0.2844
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.4208
## Mcnemar's Test P-Value : < 2.2e-16
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E
## Sensitivity 0.6978 0.44403 0.50835 0.49333 0.49010
## Specificity 0.8321 0.87901 0.88225 0.89898 0.93063
## Pos Pred Value 0.6229 0.46825 0.47698 0.48899 0.61414
## Neg Pred Value 0.8739 0.86823 0.89468 0.90055 0.89012
## Prevalence 0.2844 0.19352 0.17440 0.16384 0.18387
## Detection Rate 0.1984 0.08593 0.08866 0.08083 0.09011
## Detection Prevalence 0.3186 0.18351 0.18587 0.16530 0.14673
## Balanced Accuracy 0.7650 0.66152 0.69530 0.69616 0.71036
Our final results show that our random forest model provides better accuracy but also generalizes better against our test set.